In [ ]:
# Import the Python libraries we need
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

In [ ]:
# Import the data
accidents_data_file = '/Users/robert.dempsey/Dropbox/Private/Art of Skill Hacking/Books/' \
                      'Python Business Intelligence Cookbook/Data/Stats19-Data1979-2004/Accidents7904.csv'

accidents = pd.read_csv(accidents_data_file,
                        sep=',',
                        header=0,
                        index_col=False,
                        parse_dates=True,
                        tupleize_cols=False,
                        error_bad_lines=False,
                        warn_bad_lines=True,
                        skip_blank_lines=True,
                        low_memory=False
                        )

Weather conditions distribution


In [ ]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.hist(accidents['Weather_Conditions'],
        range=(accidents['Weather_Conditions'].min(),
               accidents['Weather_Conditions'].max()))
counts, bins, patches = ax.hist(accidents['Weather_Conditions'],
                                facecolor='green',
                                edgecolor='gray')
ax.set_xticks(bins)
plt.title('Weather Conditions Distribution')
plt.xlabel('Weather Condition')
plt.ylabel('Count of Weather Condition')
plt.savefig('dashboard/charts/weather-conditions-distribution.png')

Light Conditions


In [ ]:
accidents.boxplot(column='Light_Conditions',
                  return_type='dict');
plt.savefig('dashboard/charts/light-conditions-boxplot.png')

In [ ]:
# Create a box plot of the light conditions grouped by weather conditions
accidents.boxplot(column='Light_Conditions',
                  by = 'Weather_Conditions',
                  return_type='dict');
plt.savefig('dashboard/charts/lc-by-wc-boxplot.png')

Time Series Analysis


In [ ]:
# Create a dataframe containing the total number of casualties by date
casualty_count = accidents.groupby('Date').agg({'Number_of_Casualties': np.sum})

# Convert the index to a DateTimeIndex
casualty_count.index = pd.to_datetime(casualty_count.index)

# Sort the index so the plot looks correct
casualty_count.sort_index(inplace=True,
                          ascending=True)

In [ ]:
# Plot all the data
casualty_count.plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-all.png')

In [ ]:
# Plot one year of the data
casualty_count['2000'].plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-2000.png')

In [ ]:
# Plot the yearly total casualty count for each year in the 1980's
the1980s = casualty_count['1980-01-01':'1989-12-31'].groupby(casualty_count['1980-01-01':'1989-12-31'].index.year).sum()
the1980s.plot(figsize=(18, 4))
plt.savefig('dashboard/charts/casualty-count-1980s.png')

In [ ]: